In [16]:
import pandas as pd
import numpy as np
from wordcloud import WordCloud
import matplotlib.pyplot as plt
In [2]:
df = pd.read_excel("RA_change reason.xlsx", sheet_name = "Sheet1")
In [3]:
df
Out[3]:
cik year new_peers old_peers turnover_ratio Quote Category Change url url_prev
0 25445 2007 4 19 0.173913 The Committee reviewed two potential peer grou... revenues, net income, market capitalization, d... NaN https://www.sec.gov/Archives/edgar/data/25445/... https://www.sec.gov/Archives/edgar/data/25445/...
1 7536 2007 23 12 0.657143 The list differs from the “Peer Group” used fo... size and scope, publicly-traded, historical co... NaN https://www.sec.gov/Archives/edgar/data/7536/0... https://www.sec.gov/Archives/edgar/data/7536/0...
2 875320 2007 7 7 0.500000 We select the companies for this comparator gr... industry,operating expenses and market capital... NaN https://www.sec.gov/Archives/edgar/data/875320... https://www.sec.gov/Archives/edgar/data/875320...
3 310569 2007 10 34 0.227273 Each year the Committee reviews and considers ... base salary, target bonus, total cash, long-te... NaN https://www.sec.gov/Archives/edgar/data/310569... https://www.sec.gov/Archives/edgar/data/310569...
4 55242 2007 13 23 0.361111 We included many of the companies in the peer ... revenue, operational scope, organizational com... NaN https://www.sec.gov/Archives/edgar/data/55242/... https://www.sec.gov/Archives/edgar/data/55242/...
... ... ... ... ... ... ... ... ... ... ...
195 59527 2018 4 21 0.160000 We use a peer group of publicly traded industr... us-based, revenue, compete for talent, shareho... NaN https://www.sec.gov/Archives/edgar/data/59527/... https://www.sec.gov/Archives/edgar/data/59527/...
196 1103982 2018 3 25 0.107143 In constructing our Compensation Survey Peer G... Revenue, market capitalization, industry, mark... NaN https://www.sec.gov/Archives/edgar/data/110398... https://www.sec.gov/Archives/edgar/data/110398...
197 895419 2018 2 17 0.105263 The Committee, assisted by Radford, selects Cr... Business, size, revenue, market capitalization... NaN https://www.sec.gov/Archives/edgar/data/895419... https://www.sec.gov/Archives/edgar/data/895419...
198 915840 2018 1 11 0.083333 These companies were chosen because, in additi... size acquired https://www.sec.gov/Archives/edgar/data/915840... https://www.sec.gov/Archives/edgar/data/915840...
199 1057706 2018 4 9 0.307692 Industry survey data was collected from bankin... Asset size, compete for talent NaN https://www.sec.gov/Archives/edgar/data/105770... https://www.sec.gov/Archives/edgar/data/105770...

200 rows × 10 columns

In [60]:
# Concatenate all text in the 'Category' column
text = " ".join(str(quote) for quote in df['Category'])
In [66]:
# Generate the word cloud
wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='plasma', max_words=100, min_font_size=10).generate(text)
In [67]:
# Display the word cloud
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')  # No axis for cleaner visualization
plt.show()
In [ ]: